Dimensional Model Scripts

01 Table Creation

CREATE TABLE IF NOT EXISTS tr_wi_2023.zip_county_wda_xwalk
(
    zip VARCHAR(5) NOT NULL  ENCODE lzo
    ,county VARCHAR(11)   ENCODE lzo
    ,wda SMALLINT   ENCODE az64
    ,PRIMARY KEY (zip)
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.zip_county_wda_xwalk owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.zip_county_wda_xwalk TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.zip_county_wda_xwalk TO group db_t00111_ro;

------------------------

CREATE TABLE IF NOT EXISTS tr_wi_2023.wi_rdim_week
(
    week_id SMALLINT NOT NULL  ENCODE az64
    ,week_code DATE NOT NULL  ENCODE az64
    ,quarter_code CHAR(6)   ENCODE lzo
    ,calendar_year SMALLINT   ENCODE az64
    ,calendar_quarter SMALLINT   ENCODE az64
    ,PRIMARY KEY (week_id)
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.wi_rdim_week owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_week TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.wi_rdim_week TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_week TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_week TO group db_t00111_ro;

-----

CREATE TABLE IF NOT exists tr_wi_2023.wi_rdim_industry
(
    code VARCHAR(18)   ENCODE lzo
    ,title VARCHAR(357)   ENCODE lzo
    ,description VARCHAR(24000)   ENCODE lzo
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.wi_rdim_industry owner to dbadmin11;

-- Permissions

GRANT ALL ON TABLE tr_wi_2023.wi_rdim_industry TO dbadmin11;
GRANT INSERT, SELECT, UPDATE,  DELETE ON TABLE tr_wi_2023.wi_rdim_industry TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_industry TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_industry TO group db_t00111_ro;

-----------------------
CREATE TABLE IF NOT exists tr_wi_2023.wi_rdim_occupation
(
    SOC_code VARCHAR(18)   ENCODE lzo
    ,SOC_title VARCHAR(357)   ENCODE lzo
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.wi_rdim_occupation owner to dbadmin11;

-- Permissions

GRANT ALL ON TABLE tr_wi_2023.wi_rdim_occupation TO dbadmin11;
GRANT INSERT, SELECT, UPDATE,  DELETE ON TABLE tr_wi_2023.wi_rdim_occupation TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_rdim_occupation TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_rdim_occupation TO group db_t00111_ro;

------------------------

CREATE TABLE IF NOT exists tr_wi_2023.wi_mdim_person
(
    ssn VARCHAR(64)   ENCODE lzo
    ,person_id int identity not null
    ,gender INT   ENCODE az64
    ,race VARCHAR(3) ENCODE lzo
    ,ethnicity VARCHAR(3) ENCODE lzo
    ,birth_date DATE ENCODE az64
    ,primary key(person_id)
)
DISTSTYLE AUTO
;
ALTER TABLE tr_wi_2023.wi_mdim_person owner to dbadmin11;

-- Permissions

GRANT ALL ON TABLE tr_wi_2023.wi_mdim_person TO dbadmin11;
GRANT INSERT, SELECT, UPDATE,  DELETE ON TABLE tr_wi_2023.wi_mdim_person TO group ci_data_eng;
GRANT ALL ON TABLE tr_wi_2023.wi_mdim_person TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_mdim_person TO group db_t00111_ro;

------------------------ 

CREATE TABLE IF NOT EXISTS tr_wi_2023.wi_fact_weekly_observation
(
    person_id int not null  
    ,week_ending_date date
    ,week_id smallint NOT NULL  
    ,benefit_claimed varchar(1) not null 
    ,benefit_yr_start date
    ,normal_benefit_received varchar(1) not null
    ,ic_claim_date date
    ,last_employer_naics varchar(6)
    ,last_employer int
    ,eligible_benefit_amount int
    ,earnings_during_wk varchar(3)
    ,entitlement int
    ,veteran_status int4
    ,commuter varchar(3)
    ,education varchar(3)
    ,disability varchar(3)
    ,ic_type varchar(3)
    ,occupation varchar
    ,program_type varchar(1)
    ,status_code varchar(3)
    ,stop_payment_indicator varchar(1)
    ,waiting_week varchar(1)
    ,res_zip varchar(5)
    ,employed_in_quarter varchar(1) not null 
    ,calendar_year smallint
    ,calendar_quarter smallint 
    ,primary_employer_id varchar(10)
    ,primary_employer_wages int
    ,total_wages int
    ,employer_count SMALLINT
    ,PRIMARY KEY (person_id, week_id)
)
DISTSTYLE auto
;
ALTER TABLE tr_wi_2023.wi_fact_weekly_observation owner to dbadmin11;
GRANT ALL ON TABLE tr_wi_2023.wi_fact_weekly_observation TO dbadmin11;
GRANT SELECT, INSERT, DELETE, UPDATE ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group ci_data_eng;
GRANT SELECT ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group ci_read_group;
GRANT SELECT ON TABLE tr_wi_2023.wi_fact_weekly_observation TO group db_t00111_ro;

ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (person_id) 
REFERENCES tr_wi_2023.wi_mdim_person(person_id)

ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (week_id) 
REFERENCES tr_wi_2023.wi_rdim_week(week_id)

ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (res_zip) 
REFERENCES tr_wi_2023.wi_rdim_zip_county_wda_xwalk(zip)

ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (occupation) 
REFERENCES tr_wi_2023.wi_rdim_occupation(soc_code)

ALTER TABLE tr_wi_2023.wi_fact_weekly_observation
ADD FOREIGN KEY (last_employer_naics) 
REFERENCES tr_wi_2023.wi_rdim_industry(code)

02 Linkage Reference Dimension Load Scripts

--populate week table 
CREATE OR REPLACE PROCEDURE populating_wi_rdim_week() --need TO CREATE PROCEDURE 
LANGUAGE plpgsql
as $$
declare
    StartDate DATE := '2006-01-07'; -- setting START date 
    EndDate DATE := '2023-12-30'; -- setting END date 
    Date DATE := StartDate; -- setting the variable date TO increate WITH EACH loop 
    ID smallint :=1; -- setting id variable 

begin
    while Date <= EndDate loop -- starting the while loop 
        
        raise notice 'this is the date %', Date; -- printing OUT what date im on
    
        insert into tr_wi_2023.wi_rdim_week (
        week_id, 
        week_code, 
        quarter_code, 
        calendar_year, 
        calendar_quarter
        )
        values(
        ID, -- Quarter ID
        Date, -- Week Code
        CAST(DATE_PART(y,Date) AS CHAR(4)) + 'Q' + CAST(DATE_PART(qtr,Date) AS CHAR(1)), --Quarter_Code
        DATE_PART(y,Date), --Calendar_Year
        DATE_PART(qtr,Date) --Calendar_Quarter
        );
        Date := dateadd(w,1,Date); --increasing the date variable 
        ID := ID + 1; -- incresing the id variable 
    end loop;
    raise info 'loop ended';

end;
$$;

call populating_wi_rdim_week();


-------
-- populate occupation table

--only 2-digit soc presently reported in PROMIS
select distinct substring(occupation, 1, 2) from ds_wi_dwd.promis p order by 1;
select distinct substring(occupation, 3, 8) from ds_wi_dwd.promis p;


--can join to soc_xwalk where soc_group = 'Major' without duplicating join 
--when just using first two characters in soc_title from soc_xwalk 
select distinct p.occupation, sx.soc_title  
from ds_wi_dwd.promis p 
left join ds_public_1.soc_xwalk sx 
on substring(p.occupation, 1, 2) = substring(sx.soc_code, 1, 2) and sx.soc_group = 'Major'
order by 1;

--will take just first two characters from soc_code into reference table and limit xwalk to just rows where 
-- soc_group = 'Major'
insert into tr_wi_2023.wi_rdim_occupation(
    soc_code,
    soc_title
)
select 
substring(soc_code, 1, 2) as soc_code,
soc_title
from ds_public_1.soc_xwalk sx 
where sx.soc_group = 'Major'


--- industry table
insert into tr_wi_2023.wi_rdim_industry(
    code,
    title,
    description
)
select 
code,
title,
description
from ds_public_1.naics_descriptions 

03 Person Master

--MASTERING RULES: IF GENDER, RACE, ETHNICITY, OR BIRTH_DATE CHANGE FOR AN SSN OVER TIME
-- TAKE THE MOST COMMON VALUE. IN TIES, PRIORITIZE KNOWN VALUES OVER UNKNOWN

-- first will find number of ssn values this concerns 
with mult as (
    SELECT ssn
    FROM ds_wi_dwd.promis p
    group by ssn 
    having count(distinct(gender)) > 1 or count(distinct(race)) > 1 or count(distinct(ethnicity)) > 1 or count(distinct(birth_date)) > 1 
)
select count(distinct(ssn))
from mult 


--MASTERING CODE 
insert into tr_wi_2023.wi_mdim_person(
    ssn
    ,gender
    ,race
    ,ethnicity 
    ,birth_date
)
with gender_tab as
(
    select ssn 
        , gender 
        , ROW_NUMBER() over(partition by ssn order by count(*) desc,
        --PRIORITIZE KNOWN GENDER 1 AND 2 OVER UNKNOWN 0 AND 3
            case when gender = 0 then 2
            when gender = 1 then 1 
            when gender = 2 then 1 
            when gender = 3 then 2
            else 3 end 
        ) as RowNum
    from ds_wi_dwd.promis 
    group by ssn, gender    
),
race_tab as
(
    select ssn 
        , race 
        , ROW_NUMBER() over(partition by ssn order by count(*) desc,
        --PRIORITIZE KNOWN RACE OVER UNKNOWN, UNKNOWN OVER NULL 
            case when race in (1,2,3,4,5,8) then 1
            when race in (0, 6) then 2
            when race is null then 3
            else 4 end 
    ) as RowNum
    from ds_wi_dwd.promis 
    group by ssn, race 
),
ethnicity_tab as (
    select ssn 
        , ethnicity 
        , ROW_NUMBER() over(partition by ssn order by count(*) desc,
        --PRIORITIZE KNOWN ETHNICITY OVER UNKNOWN, UNKNOWN OVER NULL 
            case when ethnicity in ('Y', 'N') then 1
            when ethnicity = '*' then 2
            when ethnicity is null then 3
            else 4 end 
    ) as RowNum
    from ds_wi_dwd.promis 
    group by ssn, ethnicity 
),
birthdate_tab as (
    select ssn 
        , birth_date 
        , ROW_NUMBER() over(partition by ssn order by count(*) desc,
        --PRIORITIZE KNOWN BIRTH_DATE OVER IMPOSSIBLE, NULL OVER IMPOSSIBLE
            case when birth_date is null then 2
            when extract(year from birth_date) > 2023 then 3
            else 1 end 
    ) as RowNum
    from ds_wi_dwd.promis 
    group by ssn, birth_date  
),
mastered_tab as (
    select g.ssn, g.gender, r.race, e.ethnicity, b.birth_date 
    from gender_tab g 
    inner join race_tab r on g.ssn = r.ssn and g.RowNum = 1 and r.RowNum = 1
    inner join ethnicity_tab e on g.ssn = e.ssn and e.RowNum = 1
    inner join birthdate_tab b on g.ssn = b.ssn and b.RowNum = 1
),
--MAKE SURE WE GET EVERYONE IN WAGE RECORDS WHO DOESN'T SHOW UP IN PROMIS TOO
unique_ssn_wage as (
    select distinct ssn
    from ds_wi_dwd.ui_wage uw 
)
--JOIN EVERYONE IN PROMIS TO EVERYONE IN WAGE WHO DOESN'T SHOW UP IN PROMIS TO GET FULL SET OF PEOPLE 
select 
coalesce(m.ssn, u.ssn) as ssn 
, m.gender, m.race
, m.ethnicity
, m.birth_date
from mastered_tab m 
full outer join unique_ssn_wage u 
on m.ssn = u.ssn ;

04 Fact Table Load

insert into tr_wi_2023.wi_fact_weekly_observation(
    person_id  
    ,week_ending_date
    ,week_id  
    ,benefit_claimed 
    ,benefit_yr_start
    ,normal_benefit_received
    ,ic_claim_date
    ,last_employer_naics
    ,last_employer
    ,eligible_benefit_amount
    ,earnings_during_wk
    ,entitlement
    ,veteran_status
    ,commuter
    ,education
    ,disability
    ,ic_type
    ,occupation
    ,program_type
    ,status_code
    ,stop_payment_indicator
    ,waiting_week
    ,res_zip
    ,employed_in_quarter 
    ,calendar_year
    ,calendar_quarter
    ,primary_employer_id
    ,primary_employer_wages
    ,total_wages
    ,employer_count
)
--FOR HERE: SSN WHERE CLAUSE TO SPEED UP QUERY
--get all week/person combos 
with person_quarter_combos as (
    select wmp.ssn, wrw.week_code  
    from tr_wi_2023.wi_mdim_person wmp 
    cross join tr_wi_2023.wi_rdim_week wrw 
    --where wmp.ssn = 'REDACTED'
),
promis_info as (
    select 
    p.ssn
    ,week_ending_date as claim_week
    ,effective_date_of_claim as benefit_yr_start 
    ,case 
        when ic_type is null and monetarily_eligible = 'Y' and stop_payment_indicator = 'N' and waiting_week = 'N' and entitlement = 0 then 'Y'
        else 'N' 
        end as normal_benefit_received
    ,ic_claim_date 
    ,last_employer_naics
    ,ui_number as last_employer 
    ,weekly_benefit_amount as eligible_benefit_amount 
    ,earnings_during_wk 
    ,entitlement 
    ,veteran_status
    ,commuter 
    ,education 
    ,disability 
    ,ic_type 
    ,substring(occupation, 1, 2) as occupation 
    ,program_type 
    ,status_code 
    ,stop_payment_indicator 
    ,waiting_week 
    ,res_zip 
    from ds_wi_dwd.promis p 
),
--FOR NOW: IGNORE ALL WAGE RECORDS WHERE UI ACCOUNT NUMBER ISN'T ALL INTEGERS
Wage_Rank AS (
    SELECT
    w.ssn,
    w.year,
    w.quarter,
    ROW_NUMBER() OVER(PARTITION BY w.ssn, w.year, w.quarter ORDER BY w.wage DESC) AS RANK,
    w.wage,
    w.ui_account
    FROM ds_wi_dwd.ui_wage w
    where w.wage > 0 and substring(w.ui_account,1,1) != 'M' 
),
Primary_Employer_Wage AS (
    select
    WR.ssn,
    WR.year,
    WR.quarter,
    WR.wage AS Primary_Employer_Wages,
    WR.ui_account as Primary_Employer_ID
    from Wage_Rank WR
    WHERE
    WR.RANK=1
),
All_Employer_Wage AS (
    SELECT 
    WR.ssn,
    WR.year,
    WR.quarter,
    COUNT(WR.ui_account) AS Employer_Count,
    SUM(WR.wage) AS Total_Wages
    FROM 
    Wage_Rank WR
    GROUP BY
    WR.ssn,
    WR.year, 
    WR.quarter 
)
select person.person_id
,pq.week_code as week_ending_date
,week.week_id
,case when pi.claim_week is null then 'N' else 'Y' end as benefit_claimed
,pi.benefit_yr_start
,case when pi.normal_benefit_received is null then 'N' else pi.normal_benefit_received end as normal_benefit_received
,pi.ic_claim_date
,pi.last_employer_naics
,pi.last_employer
,pi.eligible_benefit_amount 
,pi.earnings_during_wk
,pi.entitlement 
,pi.veteran_status
,pi.commuter 
,pi.education 
,pi.disability 
,pi.ic_type 
,pi.occupation 
,pi.program_type 
,pi.status_code 
,pi.stop_payment_indicator 
,pi.waiting_week 
,pi.res_zip
,CASE WHEN PEW.ssn IS NULL THEN 'N' ELSE 'Y' END AS employed_in_quarter
,week.calendar_year
,week.calendar_quarter
,PEW.Primary_Employer_ID as primary_empoyer_id
,PEW.Primary_Employer_Wages as primary_employer_wages
,AEW.Total_Wages as total_wages
,AEW.Employer_Count as employer_count
from person_quarter_combos pq
join tr_wi_2023.wi_mdim_person person on (pq.ssn = person.ssn)
join tr_wi_2023.wi_rdim_week week on (pq.week_code = week.week_code)
left join promis_info pi on (pq.ssn = pi.ssn) and (pq.week_code = pi.claim_week)
LEFT JOIN Primary_Employer_Wage PEW ON (PEW.ssn=pq.ssn) AND (PEW.quarter=week.calendar_quarter) and (PEW.year = week.calendar_year) 
LEFT JOIN All_Employer_Wage AEW ON (AEW.ssn=pq.ssn) AND (AEW.quarter=week.calendar_quarter) and (AEW.year = week.calendar_year)
order by person.person_id, pq.week_code;